package packt.opennlpexamples; import com.aliasi.tokenizer.EnglishStopTokenizerFactory; import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory; import com.aliasi.tokenizer.LowerCaseTokenizerFactory; import com.aliasi.tokenizer.Tokenizer; import com.aliasi.tokenizer.TokenizerFactory; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import static java.lang.System.out; import java.util.ArrayList; import java.util.List; import java.util.Scanner; import opennlp.tools.cmdline.parser.ParserTool; import opennlp.tools.namefind.NameFinderME; import opennlp.tools.namefind.TokenNameFinderModel; import opennlp.tools.parser.Parse; import opennlp.tools.parser.Parser; import opennlp.tools.parser.ParserFactory; import opennlp.tools.parser.ParserModel; import opennlp.tools.postag.POSModel; import opennlp.tools.postag.POSTaggerME; import opennlp.tools.tokenize.TokenizerME; import opennlp.tools.tokenize.TokenizerModel; import opennlp.tools.util.Sequence; import opennlp.tools.util.Span; public class NLPExamples { private String sentence = "Let's parse this sentence."; public void POSExample() { try (InputStream input = new FileInputStream( new File("en-pos-maxent.bin"));) { // To lower case example String lowerCaseVersion = sentence.toLowerCase(); out.println(lowerCaseVersion); // Pull out tokens List<String> list = new ArrayList<>(); Scanner scanner = new Scanner(sentence); while (scanner.hasNext()) { list.add(scanner.next()); } // Convert list to an array String[] words = new String[1]; words = list.toArray(words); // Build model POSModel posModel = new POSModel(input); POSTaggerME posTagger = new POSTaggerME(posModel); // Tag words String[] posTags = posTagger.tag(words); for (int i = 0; i < posTags.length; i++) { out.println(words[i] + " - " + posTags[i]); } // Find top sequences Sequence sequences[] = posTagger.topKSequences(words); for (Sequence sequence : sequences) { out.println(sequence); } } catch (IOException ex) { ex.printStackTrace(); } } public void NERExample() {//Models found at http://opennlp.sourceforge.net/models-1.5/ try (InputStream tokenStream = new FileInputStream(new File("en-token.bin")); InputStream personModelStream = new FileInputStream( new File("en-ner-person.bin"));) { TokenizerModel tm = new TokenizerModel(tokenStream); TokenizerME tokenizer = new TokenizerME(tm); TokenNameFinderModel tnfm = new TokenNameFinderModel(personModelStream); NameFinderME nf = new NameFinderME(tnfm); String sentence = "Mrs. Wilson went to Mary's house for dinner."; String[] tokens = tokenizer.tokenize(sentence); Span[] spans = nf.find(tokens); for (int i = 0; i < spans.length; i++) { out.println(spans[i] + " - " + tokens[spans[i].getStart()]); } } catch (Exception ex) { ex.printStackTrace(); } try (InputStream tokenStream = new FileInputStream("en-token.bin"); InputStream locationModelStream = new FileInputStream( new File("en-ner-location.bin"));) { TokenizerModel tm = new TokenizerModel(tokenStream); TokenizerME tokenizer = new TokenizerME(tm); TokenNameFinderModel tnfm = new TokenNameFinderModel(locationModelStream); NameFinderME nf = new NameFinderME(tnfm); sentence = "Enid is located north of Oklahoma City."; // sentence = "Pond Creek is located north of Oklahoma City."; String tokens[] = tokenizer.tokenize(sentence); Span spans[] = nf.find(tokens); for (int i = 0; i < spans.length; i++) { out.println(spans[i] + " - " + tokens[spans[i].getStart()]); } } catch (Exception ex) { ex.printStackTrace(); } } public void extractReltionships() { try (InputStream modelInputStream = new FileInputStream( new File("en-parser-chunking.bin"));) { ParserModel parserModel = new ParserModel(modelInputStream); Parser parser = ParserFactory.create(parserModel); String sentence = "Let's parse this sentence."; Parse[] parseTrees = ParserTool.parseLine(sentence, parser, 3); for(Parse tree : parseTrees) { tree.show(); out.println("Probability: " + tree.getProb()); } for(Parse tree : parseTrees) { out.println("Probability: " + tree.getProb()); } } catch (Exception ex) { ex.printStackTrace(); } } public NLPExamples() { // POSExample(); // scannerClassExample(); // lingPipeExamples(); // NERExample(); // extractReltionships(); } public static void main(String[] args) { new NLPExamples(); } }